from bs4 import BeautifulSoup

info=[]
with open(u"G:\\学习\\学习资料\\python\\Python实战：四周实现爬虫系统\\study\\week1_2\\web\\index.html") as file:
    soup = BeautifulSoup(file, 'lxml')
    images = soup.select("body > div.main-content > ul > li > img")
    titles = soup.select("body > div.main-content > ul > li > div.article-info > h3 > a")
    descs = soup.select("body > div.main-content > ul > li > div.article-info > p.description")
    rates = soup.select("body > div.main-content > ul > li > div.rate > span")
    cates = soup.select("body > div.main-content > ul > li > div.article-info > p.meta-info")

    # print(images, titles, descs, rates, cates, sep='\n----------\n')
    pr

    for image, title, desc, rate, cate in zip(images, titles, descs, rates, cates):
        data = {
            'title':title.get_text(),
            'desc': desc.get_text(),
            'rate': rate.get_text(),
            'cate': cate.get_text(),
            # 'cate': list(cate.stripped_strings),
            'image': image.get("src")
        }
        info.append(data)


for i in info:
    if float(i['rate']) > 3:
        print(i['title'])


'''
body > div.main-content > ul > li:nth-of-type(1) > img
body > div.main-content > ul > li:nth-of-type(1) > div.article-info > h3 > a
body > div.main-content > ul > li:nth-of-type(1) > div.article-info > p.description
body > div.main-content > ul > li:nth-of-type(1) > div.rate > span
body > div.main-content > ul > li:nth-child(1) > div.article-info > p.meta-info > span:nth-child(1)
'''